; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S -data-layout="E-n64" | FileCheck %s --check-prefixes=ALL,BE
; RUN: opt < %s -passes=instcombine -S -data-layout="e-n64" | FileCheck %s --check-prefixes=ALL,LE

declare void @use(<2 x i8>)

; i16 is a common type, so we can convert independently of the data layout.
; Endian determines if a shift is needed (and so the transform is avoided).

define i16 @insert0_v2i8(i16 %x, i8 %y) {
; BE-LABEL: @insert0_v2i8(
; BE-NEXT:    [[V:%.*]] = bitcast i16 [[X:%.*]] to <2 x i8>
; BE-NEXT:    [[I:%.*]] = insertelement <2 x i8> [[V]], i8 [[Y:%.*]], i64 0
; BE-NEXT:    [[R:%.*]] = bitcast <2 x i8> [[I]] to i16
; BE-NEXT:    ret i16 [[R]]
;
; LE-LABEL: @insert0_v2i8(
; LE-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], -256
; LE-NEXT:    [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i16
; LE-NEXT:    [[R:%.*]] = or i16 [[TMP1]], [[TMP2]]
; LE-NEXT:    ret i16 [[R]]
;
  %v = bitcast i16 %x to <2 x i8>
  %i = insertelement <2 x i8> %v, i8 %y, i8 0
  %r = bitcast <2 x i8> %i to i16
  ret i16 %r
}

; i16 is a common type, so we can convert independently of the data layout.
; Endian determines if a shift is needed (and so the transform is avoided).

define i16 @insert1_v2i8(i16 %x, i8 %y) {
; BE-LABEL: @insert1_v2i8(
; BE-NEXT:    [[TMP1:%.*]] = and i16 [[X:%.*]], -256
; BE-NEXT:    [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i16
; BE-NEXT:    [[R:%.*]] = or i16 [[TMP1]], [[TMP2]]
; BE-NEXT:    ret i16 [[R]]
;
; LE-LABEL: @insert1_v2i8(
; LE-NEXT:    [[V:%.*]] = bitcast i16 [[X:%.*]] to <2 x i8>
; LE-NEXT:    [[I:%.*]] = insertelement <2 x i8> [[V]], i8 [[Y:%.*]], i64 1
; LE-NEXT:    [[R:%.*]] = bitcast <2 x i8> [[I]] to i16
; LE-NEXT:    ret i16 [[R]]
;
  %v = bitcast i16 %x to <2 x i8>
  %i = insertelement <2 x i8> %v, i8 %y, i8 1
  %r = bitcast <2 x i8> %i to i16
  ret i16 %r
}

; i32 is a common type, so we can convert independently of the data layout.
; Endian determines if a shift is needed (and so the transform is avoided).

define i32 @insert0_v4i8(i32 %x, i8 %y) {
; BE-LABEL: @insert0_v4i8(
; BE-NEXT:    [[V:%.*]] = bitcast i32 [[X:%.*]] to <4 x i8>
; BE-NEXT:    [[I:%.*]] = insertelement <4 x i8> [[V]], i8 [[Y:%.*]], i64 0
; BE-NEXT:    [[R:%.*]] = bitcast <4 x i8> [[I]] to i32
; BE-NEXT:    ret i32 [[R]]
;
; LE-LABEL: @insert0_v4i8(
; LE-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], -256
; LE-NEXT:    [[TMP2:%.*]] = zext i8 [[Y:%.*]] to i32
; LE-NEXT:    [[R:%.*]] = or i32 [[TMP1]], [[TMP2]]
; LE-NEXT:    ret i32 [[R]]
;
  %v = bitcast i32 %x to <4 x i8>
  %i = insertelement <4 x i8> %v, i8 %y, i8 0
  %r = bitcast <4 x i8> %i to i32
  ret i32 %r
}

; i32 is a common type, so we can convert independently of the data layout.
; Endian determines if a shift is needed (and so the transform is avoided).
; half type can not be used in zext instruction (and so the transform is avoided).

define i32 @insert0_v2half(i32 %x, half %y) {
; ALL-LABEL: @insert0_v2half(
; ALL-NEXT:    [[V:%.*]] = bitcast i32 [[X:%.*]] to <2 x half>
; ALL-NEXT:    [[I:%.*]] = insertelement <2 x half> [[V]], half [[Y:%.*]], i64 0
; ALL-NEXT:    [[R:%.*]] = bitcast <2 x half> [[I]] to i32
; ALL-NEXT:    ret i32 [[R]]
;
  %v = bitcast i32 %x to <2 x half>
  %i = insertelement <2 x half> %v, half %y, i8 0
  %r = bitcast <2 x half> %i to i32
  ret i32 %r
}

; i64 is a legal type, so we can convert based on the data layout.
; Endian determines if a shift is needed (and so the transform is avoided).

define i64 @insert0_v4i16(i64 %x, i16 %y) {
; BE-LABEL: @insert0_v4i16(
; BE-NEXT:    [[V:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
; BE-NEXT:    [[I:%.*]] = insertelement <4 x i16> [[V]], i16 [[Y:%.*]], i64 0
; BE-NEXT:    [[R:%.*]] = bitcast <4 x i16> [[I]] to i64
; BE-NEXT:    ret i64 [[R]]
;
; LE-LABEL: @insert0_v4i16(
; LE-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], -65536
; LE-NEXT:    [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i64
; LE-NEXT:    [[R:%.*]] = or i64 [[TMP1]], [[TMP2]]
; LE-NEXT:    ret i64 [[R]]
;
  %v = bitcast i64 %x to <4 x i16>
  %i = insertelement <4 x i16> %v, i16 %y, i8 0
  %r = bitcast <4 x i16> %i to i64
  ret i64 %r
}

; Negative test - shifts needed for both endians.

define i64 @insert1_v4i16(i64 %x, i16 %y) {
; ALL-LABEL: @insert1_v4i16(
; ALL-NEXT:    [[V:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
; ALL-NEXT:    [[I:%.*]] = insertelement <4 x i16> [[V]], i16 [[Y:%.*]], i64 1
; ALL-NEXT:    [[R:%.*]] = bitcast <4 x i16> [[I]] to i64
; ALL-NEXT:    ret i64 [[R]]
;
  %v = bitcast i64 %x to <4 x i16>
  %i = insertelement <4 x i16> %v, i16 %y, i8 1
  %r = bitcast <4 x i16> %i to i64
  ret i64 %r
}

; i64 is a legal type, so we can convert based on the data layout.
; Endian determines if a shift is needed (and so the transform is avoided).

define i64 @insert3_v4i16(i64 %x, i16 %y) {
; BE-LABEL: @insert3_v4i16(
; BE-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], -65536
; BE-NEXT:    [[TMP2:%.*]] = zext i16 [[Y:%.*]] to i64
; BE-NEXT:    [[R:%.*]] = or i64 [[TMP1]], [[TMP2]]
; BE-NEXT:    ret i64 [[R]]
;
; LE-LABEL: @insert3_v4i16(
; LE-NEXT:    [[V:%.*]] = bitcast i64 [[X:%.*]] to <4 x i16>
; LE-NEXT:    [[I:%.*]] = insertelement <4 x i16> [[V]], i16 [[Y:%.*]], i64 3
; LE-NEXT:    [[R:%.*]] = bitcast <4 x i16> [[I]] to i64
; LE-NEXT:    ret i64 [[R]]
;
  %v = bitcast i64 %x to <4 x i16>
  %i = insertelement <4 x i16> %v, i16 %y, i8 3
  %r = bitcast <4 x i16> %i to i64
  ret i64 %r
}

; Negative test - i128 is not a legal type, so we do not convert based on the data layout.

define i128 @insert0_v4i32(i128 %x, i32 %y) {
; ALL-LABEL: @insert0_v4i32(
; ALL-NEXT:    [[V:%.*]] = bitcast i128 [[X:%.*]] to <4 x i32>
; ALL-NEXT:    [[I:%.*]] = insertelement <4 x i32> [[V]], i32 [[Y:%.*]], i64 0
; ALL-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[I]] to i128
; ALL-NEXT:    ret i128 [[R]]
;
  %v = bitcast i128 %x to <4 x i32>
  %i = insertelement <4 x i32> %v, i32 %y, i8 0
  %r = bitcast <4 x i32> %i to i128
  ret i128 %r
}

; Negative test - extra use requires more instructions.

define i16 @insert0_v2i8_use1(i16 %x, i8 %y) {
; ALL-LABEL: @insert0_v2i8_use1(
; ALL-NEXT:    [[V:%.*]] = bitcast i16 [[X:%.*]] to <2 x i8>
; ALL-NEXT:    call void @use(<2 x i8> [[V]])
; ALL-NEXT:    [[I:%.*]] = insertelement <2 x i8> [[V]], i8 [[Y:%.*]], i64 0
; ALL-NEXT:    [[R:%.*]] = bitcast <2 x i8> [[I]] to i16
; ALL-NEXT:    ret i16 [[R]]
;
  %v = bitcast i16 %x to <2 x i8>
  call void @use(<2 x i8> %v)
  %i = insertelement <2 x i8> %v, i8 %y, i8 0
  %r = bitcast <2 x i8> %i to i16
  ret i16 %r
}

; Negative test - extra use requires more instructions.

define i16 @insert0_v2i8_use2(i16 %x, i8 %y) {
; ALL-LABEL: @insert0_v2i8_use2(
; ALL-NEXT:    [[V:%.*]] = bitcast i16 [[X:%.*]] to <2 x i8>
; ALL-NEXT:    [[I:%.*]] = insertelement <2 x i8> [[V]], i8 [[Y:%.*]], i64 0
; ALL-NEXT:    call void @use(<2 x i8> [[I]])
; ALL-NEXT:    [[R:%.*]] = bitcast <2 x i8> [[I]] to i16
; ALL-NEXT:    ret i16 [[R]]
;
  %v = bitcast i16 %x to <2 x i8>
  %i = insertelement <2 x i8> %v, i8 %y, i8 0
  call void @use(<2 x i8> %i)
  %r = bitcast <2 x i8> %i to i16
  ret i16 %r
}
